import sys
import json

from config import getConfigParams
from requestUrl import getDocByUrl,getEntryIdByUrl
from logs import saveLog
from formatContent import formatContent

# 获取配置参数
configParams = getConfigParams(sys.argv)
url = configParams['real_url']
env = configParams['env']

# 获取问题ID 也就是文章ID
articleId = getEntryIdByUrl(configParams['base_url'])

# 获取html文本doc，得到pq对象
doc = getDocByUrl(url)

# 【暂时】封面图使用文章中第一张图片
coverUrl = ''
if doc('figure'):
    figure = doc(doc('figure')[0])
    coverUrl = doc(doc(figure)('img')).attr('src')

# 封面图时背景图时
coverUrl = ''
if doc('.DailyHeader-image img'):
    coverUrl = doc('.DailyHeader-image img').attr('src')

# 标题
title = doc('.DailyHeader-title').text()

# 文章正文
articleContent = doc('.answer .content').html()

# 文章作者
authorName = doc('.ZhihuDaily-Author').text()
authorName = authorName[authorName.find('/') + 1:]

# 去【李世泉】那里过滤标签
content = formatContent(articleContent, env=env)

result = {
    'entry_id': articleId,
    'author_name': authorName,
    'cover_url': coverUrl,
    'public_time':0,
    'title': title,
    'not_format_content': articleContent,
    'content': content,
}
logData = {
    'params': sys.argv,
    'result': result
}
saveLog(json.dumps(logData, ensure_ascii=False))
print(json.dumps(result, ensure_ascii=False))
